2 require_once ( "Parser.php" ) ;
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
8 * $wgUseXMLparser = true ;
9 * $wgEnableParserCache = false ;
10 * $wgWiki2xml to the path and executable of the command line version (cli)
11 * in LocalSettings.php
13 * @subpackage Experimental
17 * the base class for an element
22 var $children = array();
25 * This finds the ATTRS element and returns the ATTR sub-children as a single string
27 function getSourceAttrs ()
30 foreach ($this->children
as $child)
32 if ( !is_string($child) AND $child->name
== "ATTRS" )
34 $ret = $child->makeXHTML ( $parser );
41 * This collects the ATTR thingies for getSourceAttrs()
43 function getTheseAttrs ()
46 foreach ($this->children
as $child)
48 if ( !is_string($child) AND $child->name
== "ATTR" )
50 $ret[] = $child->attrs
["NAME"] . "='" . $child->children
[0] . "'" ;
53 return implode ( " " , $ret ) ;
57 * This function generates the XHTML for the entire subtree
59 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
63 $attr2 = $this->getSourceAttrs () ;
64 if ( $attr != "" AND $attr2 != "" ) $attr .= " " ;
70 if ( $attr != "" ) $ret .= " " . $attr ;
74 foreach ($this->children
as $child) {
75 if ( is_string($child) ) {
77 } else if ( $child->name
!= "ATTRS" ) {
78 $ret .= $child->makeXHTML ( $parser );
82 $ret .= "</" . $tag . ">\n" ;
86 function createInternalLink ( &$parser , $target , $display_title , $options )
88 $tp = explode ( ":" , $target ) ; # tp = target parts
89 $title = "" ; # The plain title
90 $language = "" ; # The language/meta/etc. part
91 $namespace = "" ; # The namespace, if any
92 $subtarget = "" ; # The '#' thingy
93 if ( count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
99 if ( $language != "" ) # External link within the WikiMedia project
101 return "{language link}" ;
103 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
105 return "{namespace link}" ;
109 return "{internal link}" ;
113 function makeInternalLink ( &$parser )
117 foreach ($this->children
as $child) {
118 if ( is_string($child) ) {
119 # This shouldn't be the case!
121 if ( $child->name
== "LINKTARGET" )
122 $target = trim ( $child->makeXHTML ( $parser ) ) ;
124 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
128 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
129 $display_title = array_pop ( $option ) ;
130 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
134 * This function actually converts wikiXML into XHTML tags
136 function makeXHTML ( &$parser )
139 $n = $this->name
; # Shortcut
141 if ( $n == "EXTENSION" ) # Fix allowed HTML
144 $ext = strtoupper ( $this->attrs
["NAME"] ) ;
145 if ( $ext == "B" ||
$ext == "STRONG" ) $n = "BOLD" ;
146 else if ( $ext == "I" ||
$ext == "EM" ) $n = "ITALICS" ;
147 else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-)
148 else if ( $ext == "S" ) $n = "STRIKE" ;
149 else if ( $ext == "P" ) $n = "PARAGRAPH" ;
150 else if ( $ext == "TABLE" ) $n = "TABLE" ;
151 else if ( $ext == "TR" ) $n = "TABLEROW" ;
152 else if ( $ext == "TD" ) $n = "TABLECELL" ;
153 else if ( $ext == "TH" ) $n = "TABLEHEAD" ;
154 else if ( $ext == "CAPTION" ) $n = "CAPTION" ;
155 else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ;
156 if ( $n != $old_n ) unset ( $this->attrs
["NAME"] ) ; # Cleanup
157 else if ( $parser->nowiki
> 0 ) $n = "" ; # No "real" wiki tags allowed
160 if ( $n == "ARTICLE" )
161 $ret .= $this->sub_makeXHTML ( $parser ) ;
162 else if ( $n == "HEADING" )
163 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs
["LEVEL"] ) ;
164 else if ( $n == "PARAGRAPH" )
165 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
166 else if ( $n == "BOLD" )
167 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
168 else if ( $n == "ITALICS" )
169 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
171 # These don't exist as wiki markup
172 else if ( $n == "UNDERLINED" )
173 $ret .= $this->sub_makeXHTML ( $parser , "u" ) ;
174 else if ( $n == "STRIKE" )
175 $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ;
178 else if ( $n == "LINK" )
179 $ret .= $this->makeInternalLink ( $parser ) ;
180 else if ( $n == "LINKTARGET" )
181 $ret .= $this->sub_makeXHTML ( $parser ) ;
182 else if ( $n == "LINKOPTION" )
183 $ret .= $this->sub_makeXHTML ( $parser ) ;
185 else if ( $n == "NOWIKI" )
188 $ret .= $this->sub_makeXHTML ( $parser , "" ) ;
192 # Unknown HTML extension
193 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
195 $ext = $this->attrs
["NAME"] ;
197 $ret .= "<" . $ext . ">" ;
198 $ret .= $this->sub_makeXHTML ( $parser ) ;
199 $ret .= "</" . $ext . "> " ;
203 else if ( $n == "TABLE" )
205 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
207 else if ( $n == "TABLEROW" )
209 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
211 else if ( $n == "TABLECELL" )
213 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
215 else if ( $n == "TABLEHEAD" )
217 $ret .= $this->sub_makeXHTML ( $parser , "th" ) ;
219 else if ( $n == "CAPTION" )
221 $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ;
224 else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes
226 return $this->getTheseAttrs () ;
230 else if ( $n == "LISTITEM" )
231 $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
232 else if ( $n == "LIST" )
234 $type = "ol" ; # Default
235 if ( $this->attrs
["TYPE"] == "bullet" ) $type = "ul" ;
236 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
239 # Something else entirely
242 $ret .= "<" . $n . ">" ;
243 $ret .= $this->sub_makeXHTML ( $parser ) ;
244 $ret .= "</" . $n . "> " ;
247 $ret = "\n{$ret}\n" ;
248 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
253 * A function for additional debugging output
257 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
259 $ret .= '<li> <b> Attributes: </b>';
260 foreach ($this->attrs
as $name => $value) {
261 $ret .= "$name => $value; " ;
265 foreach ($this->children
as $child) {
266 if ( is_string($child) ) {
267 $ret .= "<li> $child </li>\n";
269 $ret .= $child->myPrint();
277 $ancStack = array(); // the stack with ancestral elements
279 // Three global functions needed for parsing, sorry guys
280 function wgXMLstartElement($parser, $name, $attrs) {
283 $newElem = new element
;
284 $newElem->name
= $name;
285 $newElem->attrs
= $attrs;
287 array_push($ancStack, $newElem);
290 function wgXMLendElement($parser, $name) {
291 global $ancStack, $rootElem;
292 // pop element off stack
293 $elem = array_pop ($ancStack);
294 if (count ($ancStack) == 0)
297 // add it to its parent
298 array_push ($ancStack[count($ancStack)-1]->children
, $elem);
301 function wgXMLcharacterData($parser, $data) {
303 $data = trim ($data); // Don't add blank lines, they're no use...
304 // add to parent if parent exists
305 if ( $ancStack && $data != "" ) {
306 array_push ($ancStack[count($ancStack)-1]->children
, $data);
312 * Here's the class that generates a nice tree
316 function &scanFile( $filename ) {
317 global $ancStack, $rootElem;
320 $xml_parser = xml_parser_create();
321 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
322 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
323 if (!($fp = fopen($filename, 'r'))) {
324 die('could not open XML input');
326 while ($data = fread($fp, 4096)) {
327 if (!xml_parse($xml_parser, $data, feof($fp))) {
328 die(sprintf("XML error: %s at line %d",
329 xml_error_string(xml_get_error_code($xml_parser)),
330 xml_get_current_line_number($xml_parser)));
333 xml_parser_free($xml_parser);
335 // return the remaining root element we copied in the beginning
339 function scanString ( $input ) {
340 global $ancStack, $rootElem;
343 $xml_parser = xml_parser_create();
344 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
345 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
347 if (!xml_parse ($xml_parser, $input, true)) {
348 die (sprintf ("XML error: %s at line %d",
349 xml_error_string(xml_get_error_code($xml_parser)),
350 xml_get_current_line_number($xml_parser)));
352 xml_parser_free ($xml_parser);
354 // return the remaining root element we copied in the beginning
363 $filename = 'sample.xml';
364 $result = $w->scanFile( $filename );
365 print $result->myPrint();
368 $dummytext = "<article><heading level='2'> R-type </heading><paragraph><link><linktarget>image:a.jpg</linktarget><linkoption>1</linkoption><linkoption>2</linkoption><linkoption>3</linkoption><linkoption>text</linkoption></link></paragraph><paragraph>The <link><linktarget>video game</linktarget><linkoption>computer game</linkoption></link> <bold>R-type</bold> is <extension name='nowiki'>cool & stuff</extension> because:</paragraph><list type='bullet'><listitem>it's nice</listitem><listitem>it's fast</listitem><listitem>it has:<list type='bullet'><listitem>graphics</listitem><listitem>sound</listitem></list></listitem></list><table><tablerow><tablecell>Version 1 </tablecell><tablecell>not bad</tablecell></tablerow><tablerow><tablecell>Version 2 </tablecell><tablecell>much better </tablecell></tablerow></table><paragraph>This is a || token in the middle of text.</paragraph></article>" ;
370 class ParserXML
EXTENDS Parser
378 # Cleared with clearState():
379 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
380 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
383 var $mOptions, $mTitle, $mOutputType,
384 $mTemplates, // cache of already loaded templates, avoids
385 // multiple SQL queries for the same string
386 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
387 // in this path. Used for loop detection.
398 function ParserXML() {
399 $this->mTemplates
= array();
400 $this->mTemplatePath
= array();
401 $this->mTagHooks
= array();
410 function clearState() {
411 $this->mOutput
= new ParserOutput
;
412 $this->mAutonumber
= 0;
413 $this->mLastSection
= "";
414 $this->mDTopen
= false;
415 $this->mVariables
= false;
416 $this->mIncludeCount
= array();
417 $this->mStripState
= array();
418 $this->mArgStack
= array();
419 $this->mInPre
= false;
422 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
424 $tmpfname = tempnam("/tmp", "FOO");
426 $handle = fopen($tmpfname, "w");
427 fwrite($handle, $text);
430 exec ( $wgWiki2xml . " < " . $tmpfname , $a ) ;
431 $text = implode ( "\n" , $a ) ;
437 $result = $w->scanString( $text );
438 $text .= "<hr>" . $result->makeXHTML ( $this );
439 $text .= "<hr>" . $result->myPrint();
441 $this->mOutput
->setText ( $text ) ;
442 return $this->mOutput
;